#' ---
#' title: "Local Suffrage Increases Citizenship Acquisition: Evidence from the European Union 
#' Replication Log: Spain - Trajectory Balancing & Event Study"
#' author: "Hannah M. Alarian"
#' date : "March 16, 2024"
#' ---

#Use file Spain.dta
#Variable information can be found in Codebook tab Spain

#####Libraries-------------------------------------------------------------
library(tidyverse)
library(ggplot2)
library(foreach)
library(doParallel)
require(parallel)
library(panelView)
library(devtools)
library(readstata13)
library(dplyr)
library(fixest)
library(modelsummary)
library(kableExtra)

library(kbal) #install via github #install_github("chadhazlett/KBAL")
library(tjbal) #install via github #install_github("xuyiqing/tjbal")

#Data Preparation--------------------------------------------------------------
#Set working directory to location of data. Call in data from working directory
#setwd()
spain <- read.dta13("~/Dropbox/Research/Under Review/Local Suffrage/JOP/Data/Spain/Replication/Spain.dta")
head(spain)

#Removing Missing Cases and Unbalanced
spain <- spain[spain$scou != "BIH" & spain$scou != "BLR" & spain$scou != "ISR" &
                 spain$scou != "SYR" & spain$scou != "IRQ" & spain$scou != "CUB" &
                 spain$scou != "TUR" & spain$scou!= "GEO", ]

#Re-Coding Treatment variable 
spain$treat <- 0
spain$treat[spain$municipal == 1 & spain$year >= 2010] <- 1
spain$treat[spain$scou =="CPV" & spain$year >= 2010] <- 1 #to run treatment at same time

#Classifying for Event Study
spain$treated <- 0
spain$treated[spain$scou == "ECU" | spain$scou == "BOL" | spain$scou == "PER" 
            | spain$scou == "COL" | spain$scou ==  "CHL" | spain$scou =="PRY" 
            | spain$scou == "CPV"] <- 1
spainT <- spain[spain$treated == 1,]

#Relabeling variables for graphs
spain <- spain %>%
  rename("Acquisitions" = "citacq",
    Residents = lpermres,
    GDPpc = sgdp,
    Unemployment = sunemploy,
    Democracy = spolity2,
    Colony = colony,
    Language = comlang_ethno,
    Perm.Res= permres
  )

#Subsetting for balance prior to 2012 & up to 2015 & including 2007
spain12 <- spain[spain$year >= 2008 & spain$year <= 2012, ]
spain12 <- spain12[spain12$scou %in% names(which(table(spain12$scou) > 4)), ]

spain15 <- spain[spain$year >= 2008, ]
spain15 <- spain15[spain15$scou %in% names(which(table(spain15$scou) > 7)), ]

spain7 <- spain[spain$year >= 2007, ]
spain7 <- spain7[spain7$scou %in% names(which(table(spain7$scou) > 8)), ]

spainT <- spainT[spainT$year >= 2008 & spainT$year <= 2012, ]

#Looking at data structure
panelview(Acquisitions ~ treat, data = spain12, show.id = c(1:49),
          index = c("scou", "year"), xlab = "Year", ylab = "Sending Country",
          axis.lab.gap = c(0,1), pre.post = TRUE)
with(spainT, table(year, treat))

#Replicating Analysis--------------------------------------------------------------
##Models##
#Average Treatment Effect on the Treated by Year (Mean Balanced)
out.mbal <- tjbal(Y= "Acquisitions", D = "treat", 
                  X = c("Colony", "Language","Unemployment"),
                  data = spain12,
                  X.avg.time = list(c(2008), c(2008), c(2008:2009)),
                  estimator = "mean",
                  index = c("scou", "year"), parallel = F,
                  demean = T, vce = "boot", nsims = 1000, seed = 1013, conf.lvl = .9)
#Trajectory Balancing - Maximizing Covariate Inclusion
out.mbal1r <- tjbal(Y= "Acquisitions", D = "treat", 
                    X = c("Colony", "Language","Unemployment", "Democracy", "GDPpc"),
                    data = spain12,
                    X.avg.time = list(c(2008), c(2008), c(2008:2009), c(2008:2009), c(2008:2009)), 
                    estimator = "mean",
                    index = c("scou", "year"), parallel = F,
                    demean = T, vce = "boot", nsims = 1000, seed = 1013, conf.lvl = .9)
#Trajectory Balancing with All Available Time Points (2007 - 2015)
out.mbal7 <- tjbal(Y= "Acquisitions", D = "treat", 
                   X = c("Colony", "Language","Unemployment", "Democracy", "GDPpc"),
                   data = spain7,
                   X.avg.time = list(c(2008), c(2008), c(2007:2009), c(2007:2009), c(2007:2009)), 
                   estimator = "mean",
                   index = c("scou", "year"), parallel = F,
                   demean = T, vce = "boot", nsims = 1000, seed = 1013, conf.lvl = .9)
#Permanent Residents Trajectory Balancing (2008 - 2015)
out.mbal15r2 <- tjbal(Y= "Acquisitions", D = "treat", 
                      X = c("Perm.Res"),
                      data = spain15,
                      X.avg.time = list(c(2008:2009)), 
                      estimator = "mean",
                      index = c("scou", "year"), parallel = F,
                      demean = T, vce = "boot", nsims = 1000, seed = 1013, conf.lvl = .9)
#Event Study
set.seed(1013) #setting seed for replication
M <- feols(citacq ~ i(year, ref = 2009), data = spainT,
           cluster = 'scode')
names(M$coefficients) <- c("2009", "2008", "2010", "2011", "2012")

#Figure 3--------------------------------------------------------------
#Panel a
plot(out.mbal, type = "ct", ylim = c(0, 15000), 
     main = "Treated and Counterfactual Averages\n",
     ylab = "Citizenship Acquisitions", xlab = "Year", count = F)
#Panel b
plot(out.mbal, ylab = "Citizenship Acquisitions", xlab = "Year", 
     main = "Average treatment Effect on the Treated \n90% CI", count = F)

#Appendix B.1.2--------------------------------------------------------------
print(out.mbal)

#Appendix C.1.2--------------------------------------------------------------
#Panel a
plot(out.mbal1r, type = "ct", ylim = c(0, 15000), main = "Treated and Counterfactual Averages\n",
     ylab = "Citizenship Acquisitions", xlab = "Year", count = F)
#Panel b
plot(out.mbal1r, ylab = "Citizenship Acquisitions", xlab = "Year",
     main = "Average treatment Effect on the Treated \n90% CI", count = F)
#Table
print(out.mbal1r)

#Appendix C.1.3--------------------------------------------------------------
#Panel a
plot(out.mbal7, type = "ct", ylim = c(0, 20000), main = "Treated and Counterfactual Averages\n",
     ylab = "Citizenship Acquisitions", xlab = "Year", count = F)
#Panel b
plot(out.mbal7, ylab = "Citizenship Acquisitions", xlab = "Year",
     main = "Average treatment Effect on the Treated \n90% CI", count = F)
#Table
print(out.mbal7)

#Appendix C.1.4--------------------------------------------------------------
#Panel a
plot(out.mbal15r2, type = "ct", ylim = c(0, 20000), main = "Treated and Counterfactual Averages\n",
     ylab = "Citizenship Acquisitions", xlab = "Year", count = F)
#Panel b
plot(out.mbal15r2, ylab = "Citizenship Acquisitions", xlab = "Year",
     main = "Average treatment Effect on the Treated \n90% CI", count = F)
#Table
print(out.mbal15r2)

#Appendix C.1.5--------------------------------------------------------------
#Standardized Difference in Weighted and Unweighted Coefficients
#Top Right panel
plot(out.mbal, type = "balance", main = "Covariate Balance\nMean\n(2008-2012)")
#Top Left panel
plot(out.mbal1r, type = "balance", main = "Covariate Balance\nMean\n(2008-2012)")
#Bottom Right
plot(out.mbal7, type = "balance", main = "Covariate Balance\nMean\n(2007-2015)")
#Bottom Left
plot(out.mbal15r2, type = "balance", main = "Covariate Balance\nMean\n(2008-2015)")


#Standardized Difference in Weighted and Unweighted Standard Deviations
#Top Right panel
sdm <- plot(out.mbal, type = "balance", stat = "sd",
            main = "Covariate Balance\nMean\n(2008-2012)")
sdm + scale_color_manual(values = c("lightcoral", "purple"))
#Top Left panel
sdmMax <- plot(out.mbal1r, type = "balance", stat = "sd",
               main = "Covariate Balance\nMean\n(2008-2012)")
sdmMax + scale_color_manual(values = c("lightcoral", "purple"))
#Bottom Right
sdk <- plot(out.mbal7, type = "balance", stat = "sd",
            main = "Covariate Balance\nMean\n(2007-2015)")
sdk + scale_color_manual(values = c("lightcoral", "purple"))
#Bottom Left
sdres <- plot(out.mbal15r2, type = "balance", stat = "sd",
              main = "Covariate Balance\nMean\n(2008-2015)")
sdres + scale_color_manual(values = c("lightcoral", "purple"))

#Appendix C.1.6--------------------------------------------------------------
coefplot(M, drop = '(Intercept)',
         pt.join = TRUE, ref = c("2009" = 3), ref.line = TRUE,
         main = "Effect of Suffrage on Citizenship Acquisition\n2010 Enfranchised Origins in Spain",
         xlab = "Year",
         dict = c("year::2008"="2008", "year::2010"="2010",
                  "year::2011"="2011", "year::2012"="2012"))
modelsummary(M,
             stars = T,
             coef_map = c("2008","2009","2010","2011","2012"),
             gof_omit = 'DF|Deviance|R2|Log.Lik.|BIC|Std. Errors')


